#!/usr/bin/python
'''
This takes 5 files and finds various overlaps.
'''
import sys
import os

# parameters.
human_d = sys.argv[1]
wallaby_d = sys.argv[2]
mouse_d = sys.argv[3]
possum_d = sys.argv[4]
platypus_d = sys.argv[5]

# read data into sets.
def read_data(d):
	# get files in directory.
	fls = os.listdir(d)
	res_dict = {}
	
	for df in fls:
		f = "%s/%s" % (d, df)
	
		# open file.
		fin = open(f, "rb")
		lines = fin.readlines()
		fin.close()
		
		# build set.
		res = set()
		for line in lines:
			tmp = line.strip()
			if (tmp.count("ROVE") == 0) and (tmp.count("HANNIBAL") == 0) : continue
			if tmp == "": continue
			res.add(tmp)
			
		# add to dict.
		res_dict[df] = res
	
	# return all results.
	return res_dict
		
# read data.
human = read_data(human_d)
wallaby = read_data(wallaby_d)
mouse = read_data(mouse_d)
possum = read_data(possum_d)
platypus = read_data(platypus_d)

# print shared in all.
print "shared in all mammals:"
for t in wallaby:
	tmp = human[t] & wallaby[t] & mouse[t] & possum[t] & platypus[t]
	print t, len(tmp)

# print shared in all.
print "shared in all human, mouse, wallaby, opposum not in platypus:"
for t in wallaby:
	tmp = human[t] | wallaby[t] | mouse[t] | possum[t]
	tmp1 = tmp - platypus[t]
	print t, len(tmp1)
	
# print platypus and wallaby.
print "shared in all wallaby, platypus not in human, mouse, possum:"
for t in wallaby:
	tmp1 =  wallaby[t] & platypus[t]
	tmp2 = mouse[t] | possum[t] | human[t]
	tmp3 = tmp1 - tmp2
	print t, len(tmp3)
	
# print platypus and wallaby.
print "shared in all human, mouse, wallaby not in platypus, possum:"
for t in wallaby:
	tmp1 =  human[t] & mouse[t] & wallaby[t]
	tmp2 = possum[t] | platypus[t]
	tmp3 = tmp1 - tmp2
	print t, len(tmp3)

sys.exit()
# print human,mouse,wallaby,possum, not in platypus.
tmp = human & mouse & wallaby & possum
tmp1 = tmp.difference(platypus)
print "shared in human, mouse, possum, wallaby but not platypus", len(tmp1)

# print wallaby, platypus, not in anything else.
tmp = wallaby & platypus
tmp1 = tmp.difference(human, mouse, possum)
print "shared in wallaby, platypus, but not in human, mouse, possum", len(tmp1)

# print wallaby, human, mouse.
tmp = wallaby & human & mouse
tmp1 = tmp.difference(possum, platypus)
print "shared in wallaby, human, mouse, not in possum, platypus", len(tmp1)

# print wallaby, possum only.
tmp = wallaby & possum
tmp1 = tmp.difference(mouse, human, platypus)
print "shared in wallaby, possum, not in human, mouse, platypus", len(tmp1)

# print wallaby.
tmp = wallaby
tmp1 = tmp.difference(mouse, human, platypus, possum)
print "only in wallaby", len(tmp1)

# all lineages.
tmp1 = human | mouse
tmp2 = possum | wallaby
tmp3 = platypus
tmp4 = tmp1 & tmp2 & tmp3
print "all lineages", len(tmp4)
